*************************************************************************
*																		*
*					Campaign Panel Book Merge Data 2009 and 2013		*
*																		*
*************************************************************************	

/*
Since the datasets for the election campaigns 2009 and 2013 only exist separately, they are merged in this do-file. 
*/

cd "${data}"

clear
clear mata
clear matrix
set maxvar 32767
set more off
set scrollbufsize 2000000

*Merge the two separate datsets derived from the official GESIS homepage
*We apply version 5.0.0 of the short-term campaign panel data 2009 and 3.2.0 of the short-term campaign panel data 2013 for the analysis


*First, we adjust the 2009 dataset by creating a common ID and a prefix in order to identify the 2009 variables
use "ZA5305_en_v5-0-0", clear

*Create a_ prefix in order to identify 2009 variables
foreach x of var * { 
	rename `x' a_`x' 
}

rename a_lfdn lfdn09

save "data2009_merge",replace




*Second, we adjust the 2013 dataset by creating a common ID and a prefix in order to identify the 2013 variables
use "ZA5704_en_v3-2-0", clear

*Create a_ prefix in order to identify 2013 variables
foreach x of var * { 
	rename `x' b_`x' 
} 

rename b_lfdn lfdn13

*Generate meaningful wave participation variable for 2013
drop b_teilnahme

gen b_teilnahme = b_w1b + b_w2b + b_w3b + b_w4b + b_w5b + b_w6b + b_w7b

save "data2013_merge",replace


***Now we prepare the merge of the two datasets

*Identification of those respondents participating in both short-term panels
use "ZA5757_v1-1-0.dta", clear
keep lfdn09 lfdn13
sort lfdn09 
save "corresponding_lfdn.dta",replace

use "data2009_merge.dta", clear
merge 1:1 lfdn09 using "corresponding_lfdn.dta"
tab _merge, m
drop _merge

*Fill missings in lfdn13 with number larger than 6000
gen test_lfdn13=6000+_n
replace test_lfdn13=lfdn13 if lfdn13!=.
tab test_lfdn13, m
rename lfdn13 lfdn13_org
rename test_lfdn13 lfdn13
sort lfdn13 
save "data2009_merge+lfdn13.dta", replace

*matching the data
merge 1:1 lfdn13 using "data2013_merge.dta"
tab _merge, m
drop _merge 



*Delete all cases which did not participate in all seven waves in either 2009 or 2013
*Due to mistakes in the data generating process in 2009 several respondents had to be invited for the second wave. These respondents only have to have participated in the last six waves of 2009

tab a_teilnahme b_teilnahme, m
keep if b_teilnahme==7 | (a_w1==1 & a_teilnahme==7) | (a_w1==0 & a_teilnahme==6)
tab a_teilnahme b_teilnahme, m


save "data2009_2013 participants 7 waves.dta", replace


gen lfdn = _n 		//generate ID for common dataset

* create dummies to signify cases to be included
gen include09=1 if a_teilnahme==7 | (a_teilnahme==6 & a_w2==1 & a_w1~=1)
fre include09
gen include13=1 if b_teilnahme==7
fre include13


/* 4804 observations in total */

* Choose appropriate names for weights
gen a_weight = a_gew_q1_ges
gen b_weight = b_wei_mz

save "data2009_2013 participants 7 waves.dta", replace
